# Import the necessary libraries
library(readr)
library(dplyr)
library(tidyr)
library(tseries)
library(vars)
library(ggplot2)
library(plotly)
library(stargazer)
library(gridExtra)
library(forecast)
library(lmtest)

Clean and transform the data

# Import the data
data <- read_csv('Sample Media Spend Data.csv')
head(data)
## # A tibble: 6 × 10
##   Division Calen…¹ Paid_…² Organ…³ Googl…⁴ Email…⁵ Faceb…⁶ Affil…⁷ Overa…⁸ Sales
##   <chr>    <chr>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
## 1 A        1/6/20…     392     422     408 349895.   73580   12072     682 59417
## 2 A        1/13/2…     787     904     110 506270.   11804    9499     853 56806
## 3 A        1/20/2…      81     970     742 430042.   52232   17048     759 48715
## 4 A        1/27/2…      25     575      65 417746.   78640   10207     942 72047
## 5 A        2/3/20…     565     284     295 408506.   40561    5834     658 56235
## 6 A        2/10/2…     256     330     683 434730.   36750    8469     691 56347
## # … with abbreviated variable names ¹​Calendar_Week, ²​Paid_Views,
## #   ³​Organic_Views, ⁴​Google_Impressions, ⁵​Email_Impressions,
## #   ⁶​Facebook_Impressions, ⁷​Affiliate_Impressions, ⁸​Overall_Views
dim(data)
## [1] 3051   10
summary(data)
##    Division         Calendar_Week        Paid_Views     Organic_Views     
##  Length:3051        Length:3051        Min.   :     1   Min.   :     1.0  
##  Class :character   Class :character   1st Qu.:   537   1st Qu.:   712.5  
##  Mode  :character   Mode  :character   Median :  2699   Median :  4110.0  
##                                        Mean   : 15094   Mean   : 13355.7  
##                                        3rd Qu.: 17358   3rd Qu.: 16230.5  
##                                        Max.   :518190   Max.   :270453.0  
##  Google_Impressions Email_Impressions Facebook_Impressions
##  Min.   :       7   Min.   :  40894   Min.   :     29     
##  1st Qu.:  169828   1st Qu.: 378497   1st Qu.:  57074     
##  Median :  490531   Median : 590971   Median : 127523     
##  Mean   :  886174   Mean   : 760509   Mean   : 269127     
##  3rd Qu.: 1022622   3rd Qu.: 962247   3rd Qu.: 283505     
##  Max.   :17150439   Max.   :7317730   Max.   :7558435     
##  Affiliate_Impressions Overall_Views        Sales        
##  Min.   :   910        Min.   :     2   Min.   :  15436  
##  1st Qu.:  9127        1st Qu.:   747   1st Qu.:  73394  
##  Median : 16658        Median :  7879   Median : 113573  
##  Mean   : 22911        Mean   : 27981   Mean   : 185901  
##  3rd Qu.: 27486        3rd Qu.: 34112   3rd Qu.: 202976  
##  Max.   :175791        Max.   :635057   Max.   :3575430

We want to check for any missing or duplicate data.

# Check for missing data
sum(is.na(data))
## [1] 0
# Check for duplicate data
dup <- duplicated(data)
if (any(dup)) {
  print("Duplicate data found.")
} else {
  print("No duplicate data found.")
}
## [1] "No duplicate data found."

We check the number of unique inputs in the Division column and how many rows correspond to each input.

table(data$Division)
## 
##   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   P   Q   R   S   T 
## 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 
##   U   V   W   X   Y   Z 
## 113 113 113 113 113 226

There are 26 divisions/regions/states, with an identical amount of data for the first 25 regions, but more data for division Z. The dataset is only over 113 weeks. Thus, we have a closer look at the data for division Z.

# Create a subset of the data with only division Z 
data_Z <- data[data$Division == 'Z', ]
dim(data_Z)
## [1] 226  10
data_Z
## # A tibble: 226 × 10
##    Division Calendar_W…¹ Paid_…² Organ…³ Googl…⁴ Email…⁵ Faceb…⁶ Affil…⁷ Overa…⁸
##    <chr>    <chr>          <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 Z        1/6/2018         165     346     440  9.39e5  156226   43811     223
##  2 Z        1/13/2018        101     571     347  1.36e6   24364   37350     611
##  3 Z        1/20/2018        873     128     716  1.15e6   81574   57746     344
##  4 Z        1/27/2018        232     149      64  1.12e6  139465   34921     199
##  5 Z        2/3/2018         773     509     233  1.10e6   80376   27728     786
##  6 Z        2/10/2018        660       3     379  1.17e6   79818   35288     553
##  7 Z        2/17/2018        225     508     639  1.70e6  185419   33811     699
##  8 Z        2/24/2018        123     316     439  1.49e6     139   26692     425
##  9 Z        3/3/2018         390     725     700  1.14e6   28194   28972     938
## 10 Z        3/10/2018        333     393     614  1.27e6  120648   31188     386
## # … with 216 more rows, 1 more variable: Sales <dbl>, and abbreviated variable
## #   names ¹​Calendar_Week, ²​Paid_Views, ³​Organic_Views, ⁴​Google_Impressions,
## #   ⁵​Email_Impressions, ⁶​Facebook_Impressions, ⁷​Affiliate_Impressions,
## #   ⁸​Overall_Views
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names

It seems division Z has 2 entries for each date but with different values in each column. We thus choose to focus on another division for clarity and ease of use, considering not much information is provided about the dataset.

# Create a subset of the data with only division A
data_A <- data[data$Division == 'A', ]
dim(data_A)
## [1] 113  10
data_A
## # A tibble: 113 × 10
##    Divis…¹ Calen…² Paid_…³ Organ…⁴ Googl…⁵ Email…⁶ Faceb…⁷ Affil…⁸ Overa…⁹ Sales
##    <chr>   <chr>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
##  1 A       1/6/20…     392     422     408 349895.   73580   12072     682 59417
##  2 A       1/13/2…     787     904     110 506270.   11804    9499     853 56806
##  3 A       1/20/2…      81     970     742 430042.   52232   17048     759 48715
##  4 A       1/27/2…      25     575      65 417746.   78640   10207     942 72047
##  5 A       2/3/20…     565     284     295 408506.   40561    5834     658 56235
##  6 A       2/10/2…     256     330     683 434730.   36750    8469     691 56347
##  7 A       2/17/2…     886      56     664 634433.  112489    8331     685 81604
##  8 A       2/24/2…     336      99     470 555036.     218    6319     569 80492
##  9 A       3/3/20…     305     209     501 423690.   13065    7898     772 61804
## 10 A       3/10/2…     955     283     609 471730.   84449    8428     833 64944
## # … with 103 more rows, and abbreviated variable names ¹​Division,
## #   ²​Calendar_Week, ³​Paid_Views, ⁴​Organic_Views, ⁵​Google_Impressions,
## #   ⁶​Email_Impressions, ⁷​Facebook_Impressions, ⁸​Affiliate_Impressions,
## #   ⁹​Overall_Views
## # ℹ Use `print(n = ...)` to see more rows

To gain in ease of analysis, we further transform the data and replace the Calendar_Week column with a number from 1 to 113, corresponding to the weeks recorded. Thus, there are 113 weeks of data, corresponding to 2 years and 9 weeks of data. We also remove the Division and Overall Views columns as their presence is unnecessary. Indeed, the Overall Views column represents the total number of views that a campaign received across all channels. Here however, we want to analyze the relationships and interactions between the different marketing channels. Therefore, it is appropriate to exclude it from our dataset.

data_A <- data_A %>% mutate(Week = row_number())
data_A <- data_A[, !(names(data_A) %in% c("Calendar_Week", "Division", "Overall_Views"))] # Remove columns
data_A <- data_A[c("Week", setdiff(names(data_A), "Week"))] # Move the Week column to the first position
data_A
## # A tibble: 113 × 8
##     Week Paid_Views Organic_Views Google_Impress…¹ Email…² Faceb…³ Affil…⁴ Sales
##    <int>      <dbl>         <dbl>            <dbl>   <dbl>   <dbl>   <dbl> <dbl>
##  1     1        392           422              408 349895.   73580   12072 59417
##  2     2        787           904              110 506270.   11804    9499 56806
##  3     3         81           970              742 430042.   52232   17048 48715
##  4     4         25           575               65 417746.   78640   10207 72047
##  5     5        565           284              295 408506.   40561    5834 56235
##  6     6        256           330              683 434730.   36750    8469 56347
##  7     7        886            56              664 634433.  112489    8331 81604
##  8     8        336            99              470 555036.     218    6319 80492
##  9     9        305           209              501 423690.   13065    7898 61804
## 10    10        955           283              609 471730.   84449    8428 64944
## # … with 103 more rows, and abbreviated variable names ¹​Google_Impressions,
## #   ²​Email_Impressions, ³​Facebook_Impressions, ⁴​Affiliate_Impressions
## # ℹ Use `print(n = ...)` to see more rows

Understand the data

## Plot the Sales data
p1 <- ggplot(data_A, aes(x = Week, y = Sales)) +
  geom_line() +
  labs(title = "Sales over Time", x = "Week", y = "Sales")
ggplotly(p1)
# Inspect the data patterns for the marketing channels where the spend was made
## Create a new data frame with only the columns of interest
df <- data_A[, c("Week", "Paid_Views", "Organic_Views", "Google_Impressions", 
                 "Email_Impressions", "Facebook_Impressions", "Affiliate_Impressions")]

## Reshape the data frame to long format
df_long <- gather(df, key = "Channel", value = "Value", -Week)

## Plot the data
p2 <- ggplot(df_long, aes(x = Week, y = Value, color = Channel)) +
  geom_line() +
  labs(x = "Week", y = "Value", title = "Marketing Channel Patterns Over Time")
ggplotly(p2)

From the above output, it seems that the peaks in sales coincide more with a rise in Google, and Facebook Impression. Nevertheless, conducting the VAR model will help determine this with greater precision.

Ensure the stationarity of the data

We start by taking the log of each variable to stabilise the variance of each variable over time. First, however, let us check whether there are negative or null values.

if (any(data_A <= 0)) {
  print("The dataset contains negative or 0 values.")
} else {
  print("The dataset does not contain negative or 0 values.")
}
## [1] "The dataset does not contain negative or 0 values."

The absence of negative or null values lets us directly apply log-transformation without adding one prior to doing so. Indeed, if there were null or negative values, it would be necessary to add 1 to avoid undefined outputs (and so, an error).

# Log-transformation
data_A$LSales <- log(data_A$Sales)
data_A$LPaid_Views <- log(data_A$Paid_Views)
data_A$LOrganic_Views <- log(data_A$Organic_Views)
data_A$LGoogle_Impressions <- log(data_A$Google_Impressions)
data_A$LEmail_Impressions <- log(data_A$Email_Impressions)
data_A$LFacebook_Impressions <- log(data_A$Facebook_Impressions)
data_A$LAffiliate_Impressions <- log(data_A$Affiliate_Impressions)

Once the log-transformation is done, we check the seasonal decomposition of the data. This will allow to determine whether we need to apply first-order or seasonal differencing to further stationarise the data.

Sales

LSales <- ts(data_A$LSales, frequency = 52)

# Plot of time-series
LSales.plot1 <- autoplot(LSales) + ggtitle('Log of Sales Over Time')

# Plot of seasonal decomposition
LSales.plot2 <- LSales %>% stl(s.window = "period") %>% autoplot

grid.arrange(LSales.plot1, LSales.plot2, ncol = 2)

There seems to be a clear trend, although the longer grey bar may suggest it is not as significant. There does however seem to be a seasonal component, bringing the necessity to apply a seasonal difference. We can check whether our analysis is correct by conducting several stationarity tests.

# Stationarity tests
adf.test(LSales) # Augmented Dickey-Fuller Test
## 
##  Augmented Dickey-Fuller Test
## 
## data:  LSales
## Dickey-Fuller = -2.9919, Lag order = 4, p-value = 0.1649
## alternative hypothesis: stationary
pp.test(LSales) # Phillips-Perron Unit Root Test
## 
##  Phillips-Perron Unit Root Test
## 
## data:  LSales
## Dickey-Fuller Z(alpha) = -27.116, Truncation lag parameter = 4, p-value
## = 0.01143
## alternative hypothesis: stationary
kpss.test(LSales) # KPSS Test for Level Stationarity
## 
##  KPSS Test for Level Stationarity
## 
## data:  LSales
## KPSS Level = 0.60744, Truncation lag parameter = 4, p-value = 0.02196
# Seasonal stationarity
nsdiffs(LSales)
## [1] 1

Two out of the three tests say that the data is not stationary, consistent with our analysis of the data, we choose to apply first-order differencing. The nsdiffs function gives an output of 1, indicating that it is necessary to apply seasonal differencing as well.

# Apply differencing
LSales.diff <- diff(LSales, differences = 1) # First-order differencing
LSales.diff <- diff(LSales.diff, differences = 1, lag = 52) # Seasonal differencing for a yearly pattern with weekly data
# Check data is differenced
ndiffs(LSales.diff)
## [1] 0
nsdiffs(LSales.diff)
## [1] 0

Due to the ouput of 0, we know the data is now stationary.

LSales
## Time Series:
## Start = c(1, 1) 
## End = c(3, 9) 
## Frequency = 52 
##   [1] 10.99234 10.94740 10.79374 11.18507 10.93729 10.93928 11.30963 11.29591
##   [9] 11.03172 11.08128 11.05002 11.08242 11.13418 10.88693 10.99122 11.19337
##  [17] 11.05049 11.20422 11.19932 11.05757 11.09635 11.12476 11.18880 11.11100
##  [25] 11.13932 11.41223 10.77411 11.61149 11.61962 11.14587 11.20297 10.99289
##  [33] 11.35328 11.47564 11.37651 11.19140 11.54014 11.46093 11.50458 11.66837
##  [41] 11.57038 11.76128 11.62978 11.96139 12.13820 12.24757 12.73172 12.49238
##  [49] 12.14090 12.15734 11.60775 11.10417 11.14923 10.92414 11.31978 11.16785
##  [57] 11.13017 11.06822 11.54827 11.30288 11.21976 10.99869 11.37586 11.32521
##  [65] 11.11336 11.28625 10.92013 11.23826 11.41833 11.29576 11.25314 11.09492
##  [73] 11.37838 11.17191 11.02564 11.52008 11.32904 11.15662 11.06756 11.48748
##  [81] 11.30017 11.12707 11.09351 10.85478 11.39041 11.64852 11.40607 11.51172
##  [89] 11.59583 11.35299 11.25152 12.00685 11.82162 11.87926 11.81847 11.68239
##  [97] 12.27802 12.28809 12.23593 12.86914 12.79052 12.52809 12.13691 11.78108
## [105] 11.76967 11.41841 11.56976 11.24181 11.19100 11.25949 12.03274 11.69189
## [113] 11.50647

Paid Views

LPaid_Views <- ts(data_A$LPaid_Views, frequency = 52)

# Plot of time-series
LPaid_Views.plot1 <- autoplot(LPaid_Views) + ggtitle('Log of Paid Views Over Time')

# Plot of seasonal decomposition
LPaid_Views.plot2 <- LPaid_Views %>% stl(s.window = "period") %>% autoplot

grid.arrange(LPaid_Views.plot1, LPaid_Views.plot2, ncol = 2)

There doesn’t seem to be a significant seasonal component although there may be a trend component (shown by the shorter grey bar), which we can check with stationarity and seasonal stationarity tests.

# Stationarity tests
adf.test(LPaid_Views) # Augmented Dickey-Fuller Test
## 
##  Augmented Dickey-Fuller Test
## 
## data:  LPaid_Views
## Dickey-Fuller = -2.7254, Lag order = 4, p-value = 0.2756
## alternative hypothesis: stationary
pp.test(LPaid_Views) # Phillips-Perron Unit Root Test
## 
##  Phillips-Perron Unit Root Test
## 
## data:  LPaid_Views
## Dickey-Fuller Z(alpha) = -42.401, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LPaid_Views) # KPSS Test for Level Stationarity
## 
##  KPSS Test for Level Stationarity
## 
## data:  LPaid_Views
## KPSS Level = 1.8655, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LPaid_Views)
## [1] 0

Both the ADF and KPSS tests suggest the data is not stationary, whereas the PP test suggests the data is stationary. Taking a majority rule, we apply first-order differencing. Furthermore, the nsdiffs function suggest no seasonal differencing is necessary.

# Apply first-order differencing
LPaid_Views.diff <- diff(LPaid_Views, differences = 1)
# Check data is differenced
ndiffs(LPaid_Views.diff)
## [1] 0
nsdiffs(LPaid_Views.diff)
## [1] 0

Due to the ouput of 0, we know the data is now stationary.

Organic_Views

LOrganic_Views <- ts(data_A$LOrganic_Views, frequency = 52)

# Plot of time-series
LOrganic_Views.plot1 <- autoplot(LOrganic_Views) + ggtitle('Log of Organic Views Over \nTime')

# Plot of seasonal decomposition
LOrganic_Views.plot2 <- LOrganic_Views %>% stl(s.window = "period") %>% autoplot

grid.arrange(LOrganic_Views.plot1, LOrganic_Views.plot2, ncol = 2)

There doesn’t seem to be a significant seasonal component. However, there may be a trend component, demonstrated by the shorter grey bar on the right side of the ‘trend’ panel. Therefore, we may need to apply first order differencing, which we can check with stationarity and seasonal stationarity tests.

# Stationarity tests
adf.test(LOrganic_Views) # Augmented Dickey-Fuller Test
## 
##  Augmented Dickey-Fuller Test
## 
## data:  LOrganic_Views
## Dickey-Fuller = -3.3081, Lag order = 4, p-value = 0.07338
## alternative hypothesis: stationary
pp.test(LOrganic_Views) # Phillips-Perron Unit Root Test
## 
##  Phillips-Perron Unit Root Test
## 
## data:  LOrganic_Views
## Dickey-Fuller Z(alpha) = -62.955, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LOrganic_Views) # KPSS Test for Level Stationarity
## 
##  KPSS Test for Level Stationarity
## 
## data:  LOrganic_Views
## KPSS Level = 2.1264, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LOrganic_Views)
## [1] 0

With p-values larger than 0.05 for the ADF test and smaller than 0.05 for the PP and KPSS test, the LOrganic_Views data is most likely not stationary. It is however stationary in terms of seasonality. We thus need to apply first-order differencing, but not seasonal differency to the LOrganic_Views data.

# Apply first-order differencing
LOrganic_Views.diff <- diff(LOrganic_Views, differences = 1)
# Check data is differenced
ndiffs(LOrganic_Views.diff)
## [1] 0
nsdiffs(LOrganic_Views.diff)
## [1] 0

Due to the ouput of 0, we know the data is now stationary.

Google_Impressions

LGoogle_Impressions <- ts(data_A$LGoogle_Impressions, frequency = 52)

# Plot of time-series
LGoogle_Impressions.plot1 <- autoplot(LGoogle_Impressions) + ggtitle('Log of Google Impressions \nOver Time')

# Plot of seasonal decomposition
LGoogle_Impressions.plot2 <- LGoogle_Impressions %>% stl(s.window = "period") %>% autoplot

grid.arrange(LGoogle_Impressions.plot1, LGoogle_Impressions.plot2, ncol = 2)

There seems to be a trend component, but no seasonal component. Therefore, we may need to apply first-order differencing.

# Stationarity tests
adf.test(LGoogle_Impressions) # Augmented Dickey-Fuller Test
## 
##  Augmented Dickey-Fuller Test
## 
## data:  LGoogle_Impressions
## Dickey-Fuller = -3.0443, Lag order = 4, p-value = 0.1432
## alternative hypothesis: stationary
pp.test(LGoogle_Impressions) # Phillips-Perron Unit Root Test
## 
##  Phillips-Perron Unit Root Test
## 
## data:  LGoogle_Impressions
## Dickey-Fuller Z(alpha) = -7.8075, Truncation lag parameter = 4, p-value
## = 0.6642
## alternative hypothesis: stationary
kpss.test(LGoogle_Impressions) # KPSS Test for Level Stationarity
## 
##  KPSS Test for Level Stationarity
## 
## data:  LGoogle_Impressions
## KPSS Level = 0.99106, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LGoogle_Impressions)
## [1] 0

The ADF (p-value = 0.1432 > 0.05), PP (p-value = 0.6642 > 0.05), and KPSS (p-value = 0.01 < 0.05) tests all suggest the data is not stationary. Therefore, with our prior analysis, we apply first-order differencing. It is unnecessary to apply seasonal differencing.

LGoogle_Impressions.diff <- diff(LGoogle_Impressions, differences = 1)
# Check data is differenced
ndiffs(LGoogle_Impressions.diff)
## [1] 0
nsdiffs(LGoogle_Impressions.diff)
## [1] 0

Due to the ouput of 0, we know the data is now stationary.

Email_Impressions

LEmail_Impressions <- ts(data_A$LEmail_Impressions, frequency = 52)

# Plot of time-series
LEmail_Impressions.plot1 <- autoplot(LEmail_Impressions) + ggtitle('Log of Email Impressions \nOver Time')

# Plot of seasonal decomposition
LEmail_Impressions.plot2 <- LEmail_Impressions %>% stl(s.window = "period") %>% autoplot

grid.arrange(LEmail_Impressions.plot1, LEmail_Impressions.plot2, ncol = 2)

There doesn’t appear to be a trend or a seasonal component in the data.

# Stationarity tests
adf.test(LEmail_Impressions) # Augmented Dickey-Fuller Test
## 
##  Augmented Dickey-Fuller Test
## 
## data:  LEmail_Impressions
## Dickey-Fuller = -3.4772, Lag order = 4, p-value = 0.04741
## alternative hypothesis: stationary
pp.test(LEmail_Impressions) # Phillips-Perron Unit Root Test
## 
##  Phillips-Perron Unit Root Test
## 
## data:  LEmail_Impressions
## Dickey-Fuller Z(alpha) = -70.972, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LEmail_Impressions) # KPSS Test for Level Stationarity
## 
##  KPSS Test for Level Stationarity
## 
## data:  LEmail_Impressions
## KPSS Level = 0.26251, Truncation lag parameter = 4, p-value = 0.1
# Seasonal stationarity
nsdiffs(LEmail_Impressions)
## [1] 0

All tests suggest the data is stationary, thus we do not conduct first-order differencing. Again, we do not need to apply seasonal differencing.

Facebook_Impressions

LFacebook_Impressions <- ts(data_A$LFacebook_Impressions, frequency = 52)

# Plot of time-series
LFacebook_Impressions.plot1 <- autoplot(LFacebook_Impressions) + ggtitle('Log of Facebook \nImpressions Over Time')

# Plot of seasonal decomposition
LFacebook_Impressions.plot2 <- LFacebook_Impressions %>% stl(s.window = "period") %>% autoplot

grid.arrange(LFacebook_Impressions.plot1, LFacebook_Impressions.plot2, ncol = 2)

There is a clear trend in the data although this may not be significant due to the long grey bar. We check this below with stationarity tests.

# Stationarity tests
adf.test(LFacebook_Impressions) # Augmented Dickey-Fuller Test
## 
##  Augmented Dickey-Fuller Test
## 
## data:  LFacebook_Impressions
## Dickey-Fuller = -3.5554, Lag order = 4, p-value = 0.0404
## alternative hypothesis: stationary
pp.test(LFacebook_Impressions) # Phillips-Perron Unit Root Test
## 
##  Phillips-Perron Unit Root Test
## 
## data:  LFacebook_Impressions
## Dickey-Fuller Z(alpha) = -72.907, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LFacebook_Impressions) # KPSS Test for Level Stationarity
## 
##  KPSS Test for Level Stationarity
## 
## data:  LFacebook_Impressions
## KPSS Level = 1.1317, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LFacebook_Impressions)
## [1] 0

Both the ADF and PP tests suggest the data is stationary, unlike the KPSS test. Taking the majority rule and our prior analysis, here, we choose to apply first-order differencing.

# Apply first-order differencing
LFacebook_Impressions.diff <- diff(LFacebook_Impressions, differences = 1)
# Check data is differenced
ndiffs(LFacebook_Impressions.diff)
## [1] 0
nsdiffs(LFacebook_Impressions.diff)
## [1] 0

Due to the ouput of 0, we know the data is now stationary.

Affiliate_Impressions

LAffiliate_Impressions <- ts(data_A$LAffiliate_Impressions, frequency = 52)

# Plot of time-series
LAffiliate_Impressions.plot1 <- autoplot(LAffiliate_Impressions) + ggtitle('Log of Affiliate Impressions \nOver Time')

# Plot of seasonal decomposition
LAffiliate_Impressions.plot2 <- LAffiliate_Impressions %>% stl(s.window = "period") %>% autoplot

grid.arrange(LAffiliate_Impressions.plot1, LAffiliate_Impressions.plot2, ncol = 2)

There doesn’t seem to be a significant seasonal component. However, there is a clear trend component. Therefore, we may need to apply first order differencing, which we can check with stationarity and seasonal stationarity tests.

# Stationarity tests
adf.test(LAffiliate_Impressions) # Augmented Dickey-Fuller Test
## 
##  Augmented Dickey-Fuller Test
## 
## data:  LAffiliate_Impressions
## Dickey-Fuller = -2.8295, Lag order = 4, p-value = 0.2324
## alternative hypothesis: stationary
pp.test(LAffiliate_Impressions) # Phillips-Perron Unit Root Test
## 
##  Phillips-Perron Unit Root Test
## 
## data:  LAffiliate_Impressions
## Dickey-Fuller Z(alpha) = -50.75, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LAffiliate_Impressions) # KPSS Test for Level Stationarity
## 
##  KPSS Test for Level Stationarity
## 
## data:  LAffiliate_Impressions
## KPSS Level = 0.49578, Truncation lag parameter = 4, p-value = 0.04262
# Seasonal stationarity
nsdiffs(LAffiliate_Impressions)
## [1] 0

Although according to the PP test we should reject the null hypothesis that the data is not stationary, because the ADF and KPSS tests suggest otherwise and we identified a clear negative trend in the data, we apply first-order differencing. We do not need to apply seasonal differencing.

# Apply first-order differencing
LAffiliate_Impressions.diff <- diff(LAffiliate_Impressions, differences=1)
# Check data is differenced
ndiffs(LAffiliate_Impressions.diff)
## [1] 0
nsdiffs(LAffiliate_Impressions.diff)
## [1] 0

Due to the ouput of 0, we know the data is now stationary.

Summary of the variables used

To summarise, we applied first-order differencing to all variables appart from LEmail_Impressions. The differenced log-transformed variables now should be interpreted as the growth in spending instead of percentages.

Construct the VAR model

# Build the dataset for VAR model
data.ts <- window(cbind(LPaid_Views.diff, LOrganic_Views.diff, LGoogle_Impressions.diff, LEmail_Impressions, LFacebook_Impressions.diff, LAffiliate_Impressions.diff, LSales.diff)) # exclude the first two rows to avoid NA values
data.ts <- na.omit(data.ts) # get rid of NA values

With the above variables, we construct the VAR model.

# Conduct the model and store the results
var_model <- vars::VAR(data.ts, ic = "AIC", lag.max=1, type="const")
lmp <- var_model$varresult

stargazer(lmp, type = "text", dep.var.labels.include = FALSE)
## 
## =============================================================================================
##                                                     Dependent variable:                      
##                                --------------------------------------------------------------
##                                  (1)     (2)      (3)      (4)      (5)      (6)       (7)   
## ---------------------------------------------------------------------------------------------
## LPaid_Views.diff.l1            -0.078   0.093   -0.041    -0.026   0.096   -0.015    -0.0002 
##                                (0.143) (0.060)  (0.071)  (0.064)  (0.139)  (0.038)   (0.066) 
##                                                                                              
## LOrganic_Views.diff.l1         -0.043  -0.007   -0.056    -0.094  -0.067   -0.044    -0.075  
##                                (0.334) (0.139)  (0.165)  (0.151)  (0.326)  (0.088)   (0.154) 
##                                                                                              
## LGoogle_Impressions.diff.l1     0.103   0.087    0.054    0.131   -0.004    0.034     0.001  
##                                (0.265) (0.110)  (0.130)  (0.119)  (0.258)  (0.070)   (0.122) 
##                                                                                              
## LEmail_Impressions.l1           0.217  -0.034   -0.019   0.390*** 0.571*   -0.063    -0.036  
##                                (0.292) (0.122)  (0.144)  (0.132)  (0.285)  (0.077)   (0.135) 
##                                                                                              
## LFacebook_Impressions.diff.l1  -0.086   0.013   -0.044    0.036   -0.058    0.058     0.079  
##                                (0.149) (0.062)  (0.074)  (0.067)  (0.146)  (0.039)   (0.069) 
##                                                                                              
## LAffiliate_Impressions.diff.l1  0.333   0.070    0.103    0.191    0.316  -0.482***  -0.127  
##                                (0.497) (0.208)  (0.245)  (0.224)  (0.485)  (0.131)   (0.230) 
##                                                                                              
## LSales.diff.l1                  0.083   0.005  -0.446***  -0.144   0.049    0.060   -0.429***
##                                (0.282) (0.118)  (0.139)  (0.127)  (0.275)  (0.074)   (0.130) 
##                                                                                              
## const                          -2.758   0.467    0.253   7.795*** -7.273*   0.800     0.461  
##                                (3.739) (1.560)  (1.844)  (1.686)  (3.644)  (0.982)   (1.728) 
##                                                                                              
## ---------------------------------------------------------------------------------------------
## Observations                     59      59       59        59      59       59        59    
## R2                              0.040   0.064    0.184    0.183    0.085    0.245     0.220  
## Adjusted R2                    -0.091  -0.064    0.071    0.071   -0.041    0.142     0.113  
## Residual Std. Error (df = 51)   0.630   0.263    0.310    0.284    0.613    0.165     0.291  
## F Statistic (df = 7; 51)        0.306   0.502    1.637    1.637    0.677   2.370**   2.054*  
## =============================================================================================
## Note:                                                             *p<0.1; **p<0.05; ***p<0.01

Carryover effects:

Marketing spending creates both positive and negative carryover effects, with -0.078 units for paid views, -0.007 for organic views, 0.054 for Google impressions, 0.39% for email impressions, -0.058 for facebook impressions, and -0.482 for affiliate impressions, where only email and affiliate impressions are significant at the 1% level.

Cross-over effects:

Paid views has a positive effect on organic views, however it is negligible where a unit increase in paid views increases organic views by 0.093 units. Its effect on facebook impressions is also positive where a unit increase in paid views increases facebook impressions by 0.096 units. It has a negative effect on Google impressions, where a unit increase decreases Google impressions by 0.041. Its effect on email and affiliate impressions are all also negative, where a unit increase in paid views leads to a decrease of 0.026% and 0.015 respectively.

Organic views has a negative effect on all channels, where a one unit increase in organic views decreases paid views by 0.043 units, Google impressions by 0.056, email impressions by 0.094%, facebook impressions by 0.067, and affiliate impressions by 0.044 units.

Google impressions only has a negative effect on facebook impressions, where a one unit increase in Google impressions decreases facebook impressions by 0.004 units. It has however a positive effect on all other marketing spending where a unit increase in Google impressions increases paid views, organic views, email impressions, and affiliate impressions by 0.103 units, 0.087, 0.131%, and 0.034 units respectively.

Email impressions has a negative effect on organic views, Google impressions, and affiliate impressions, where a 1% increase in email impressions decreases organic views, Google impressions, and affiliate impressions by 0.034, 0.019, and 0.063 units respectively. A 1% increase in email impressions increases paid views and facebook impressions by 0.217 and 0.571.

A unit increase in facebook impressions increases organic views by 0.013 units, email impressions by 0.036%, and affiliate impressions by 0.058 units. It decreases paid views and Google impressions by 0.086 and 0.044 units respectively.

Finally, affiliate impressions positively affects all other channels. A unit increase in affiliate impressions increases paid views by 0.333 units, organic views by 0.070 units, Google impressions by 0.103 units, email impressions by 0.191%, and facebook impressions by 0.316 units.

We can note that only one of these crossover effects is significant. Indeed, only the effect of email impressions on facebook impressions is significant. All other crossover effects are negligible.

Feedback Effects:

Sales have both negative and positive feedback effects on the marketing spending. A unit increase in past sales growth impacts paid views, organic views, facebook impressions positively, and affiliate impressions in the following period by 0.083 units, 0.005 units, 0.049 units, and 0.060 units, while it negatively impacts Google and email impressions by 0.446 units and 0.144%. The effect of sales on Google impressions is significant at the 1% level.

Direct Impact:

Google impressions and facebook impressions have a positive impact on sales with a unit increase in Google impressions increasing sales by 0.001 units and a unit increase in facebook impressions increasing sales by 0.079. Paid views, organic views, email impressions, and affiliate impressions have a negative impact on sales where a unit increase in paid views decreases sales growth by 0.0002 units, a unit increase in organic views decreases sales growth by 0.075 units, a 1% increase in email impressions decreases sales growth by 0.036%, and a unit increase in affiliate impression decreases sales by 0.127 units. Here, all effects are negligible.

Check the model is satisfactory

# Check the residuals
sales.residuals <- data.frame(residuals(var_model))$LSales
sales.residuals <- ts(sales.residuals, frequency = 52)
round(mean(sales.residuals),4)
## [1] 0
autoplot(sales.residuals)

The residual analysis is satisfactory, with a mean of 0. The model can therefore be accepted.

IRF Analysis

Analyse the IRF plots:

irfs <- irf(var_model, impulse = c('LPaid_Views.diff', 'LOrganic_Views.diff', 'LGoogle_Impressions.diff', 'LEmail_Impressions', 'LFacebook_Impressions.diff', 'LAffiliate_Impressions.diff'), response = 'LSales.diff', runs = 100, n.ahead = 7 , ortho = TRUE, ci=0.95)
plot(irfs)

We analyse below the impact or shock of the impulse series (independent variables) on the response series (dependent variable) and how it progresses over time.

Granger Causlity:

We can perform the Granger causality tests to confirm whether there is a causal relationship between the variables.

# Perform Granger causality tests
## Does Paid Views Granger-Cause Y?
grangertest(data.ts[, c("LPaid_Views.diff", "LSales.diff")], order = 1)
## Granger causality test
## 
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LPaid_Views.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.0427  0.837
grangertest(data.ts[, c("LSales.diff", "LPaid_Views.diff")], order = 1)
## Granger causality test
## 
## Model 1: LPaid_Views.diff ~ Lags(LPaid_Views.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LPaid_Views.diff ~ Lags(LPaid_Views.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.2625 0.6104
## Organic Views
grangertest(data.ts[, c("LOrganic_Views.diff", "LSales.diff")], order = 1)
## Granger causality test
## 
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LOrganic_Views.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.1561 0.6942
grangertest(data.ts[, c("LSales.diff", "LOrganic_Views.diff")], order = 1)
## Granger causality test
## 
## Model 1: LOrganic_Views.diff ~ Lags(LOrganic_Views.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LOrganic_Views.diff ~ Lags(LOrganic_Views.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.0248 0.8756
## Google Impressions
grangertest(data.ts[, c("LGoogle_Impressions.diff", "LSales.diff")], order = 1)
## Granger causality test
## 
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LGoogle_Impressions.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.0067 0.9351
grangertest(data.ts[, c("LSales.diff", "LGoogle_Impressions.diff")], order = 1)
## Granger causality test
## 
## Model 1: LGoogle_Impressions.diff ~ Lags(LGoogle_Impressions.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LGoogle_Impressions.diff ~ Lags(LGoogle_Impressions.diff, 1:1)
##   Res.Df Df      F   Pr(>F)   
## 1     56                      
## 2     57 -1 11.061 0.001561 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Email Impressions
grangertest(data.ts[, c("LEmail_Impressions", "LSales.diff")], order = 1)
## Granger causality test
## 
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LEmail_Impressions, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.0415 0.8394
grangertest(data.ts[, c("LSales.diff", "LEmail_Impressions")], order = 1)
## Granger causality test
## 
## Model 1: LEmail_Impressions ~ Lags(LEmail_Impressions, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LEmail_Impressions ~ Lags(LEmail_Impressions, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 1.1474 0.2887
## Facebook Impressions
grangertest(data.ts[, c("LFacebook_Impressions.diff", "LSales.diff")], order = 1)
## Granger causality test
## 
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LFacebook_Impressions.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 1.0606 0.3075
grangertest(data.ts[, c("LSales.diff", "LFacebook_Impressions.diff")], order = 1)
## Granger causality test
## 
## Model 1: LFacebook_Impressions.diff ~ Lags(LFacebook_Impressions.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LFacebook_Impressions.diff ~ Lags(LFacebook_Impressions.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.1876 0.6666
## Affiliate Impressions
grangertest(data.ts[, c("LAffiliate_Impressions.diff", "LSales.diff")], order = 1)
## Granger causality test
## 
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LAffiliate_Impressions.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.0875 0.7685
grangertest(data.ts[, c("LSales.diff", "LAffiliate_Impressions.diff")], order = 1)
## Granger causality test
## 
## Model 1: LAffiliate_Impressions.diff ~ Lags(LAffiliate_Impressions.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LAffiliate_Impressions.diff ~ Lags(LAffiliate_Impressions.diff, 1:1)
##   Res.Df Df      F Pr(>F)
## 1     56                 
## 2     57 -1 0.5169 0.4751

The Granger causality tests evaluate whether the lagged values of the independent variable significantly predicts the dependent variable, LSales.diff. We can interpret the outcomes as follows:

Sales do not Granger-cause any of the marketing channels.

Evaluate the Intermediate and long-term effects:

# Make a table to summarize IRF coefficients and their confidence intervals for each potential marketing spending
## Paid Views
Paid_Views.irf_table <- round(data.frame(period = seq(1, 8), 
                           response.Paid_Views = irfs$irf$LPaid_Views.diff, 
                           Paid_Views.lower = irfs$Lower$LPaid_Views.diff, 
                           Paid_Views.upper = irfs$Upper$LPaid_Views.diff),4)
colnames(Paid_Views.irf_table) <- c('Period', 'LPaid_Views.diff', 'LPaid_Views.diff Lower', 'LPaid_Views.diff Upper')
Paid_Views.irf_table
##   Period LPaid_Views.diff LPaid_Views.diff Lower LPaid_Views.diff Upper
## 1      1          -0.0246                -0.0896                 0.0576
## 2      2           0.0167                -0.0688                 0.0810
## 3      3          -0.0074                -0.0516                 0.0439
## 4      4           0.0013                -0.0249                 0.0274
## 5      5          -0.0006                -0.0164                 0.0123
## 6      6          -0.0003                -0.0093                 0.0083
## 7      7           0.0002                -0.0047                 0.0050
## 8      8          -0.0002                -0.0033                 0.0027
## Organic Views
Organic_Views.irf_table <- round(data.frame(period = seq(1, 8), 
                           response.Organic_Views = irfs$irf$LOrganic_Views.diff, 
                           Organic_Views.lower = irfs$Lower$LOrganic_Views.diff, 
                           Organic_Views.upper = irfs$Upper$LOrganic_Views.diff),4)
colnames(Organic_Views.irf_table) <- c('Period', 'LOrganic_Views.diff', 'LOrganic_Views.diff Lower', 'LOrganic_Views.diff Upper')
Organic_Views.irf_table
##   Period LOrganic_Views.diff LOrganic_Views.diff Lower
## 1      1             -0.0543                   -0.1158
## 2      2              0.0093                   -0.0678
## 3      3             -0.0025                   -0.0653
## 4      4             -0.0010                   -0.0270
## 5      5              0.0013                   -0.0156
## 6      6             -0.0011                   -0.0103
## 7      7              0.0007                   -0.0050
## 8      8             -0.0004                   -0.0033
##   LOrganic_Views.diff Upper
## 1                    0.0053
## 2                    0.0841
## 3                    0.0295
## 4                    0.0288
## 5                    0.0156
## 6                    0.0087
## 7                    0.0046
## 8                    0.0029
## Google Impressions
Google_Impressions.irf_table <- round(data.frame(period = seq(1, 8), 
                           response.Google_Impressions = irfs$irf$LGoogle_Impressions.diff, 
                           Google_Impressions.lower = irfs$Lower$LGoogle_Impressions.diff, 
                           Google_Impressions.upper = irfs$Upper$LGoogle_Impressions.diff),4)
colnames(Google_Impressions.irf_table) <- c('Period', 'LGoogle_Impressions.diff', 'LGoogle_Impressions.diff Lower', 'LGoogle_Impressions.diff Upper')
Google_Impressions.irf_table
##   Period LGoogle_Impressions.diff LGoogle_Impressions.diff Lower
## 1      1                  -0.0043                        -0.0599
## 2      2                   0.0072                        -0.0503
## 3      3                  -0.0089                        -0.0486
## 4      4                   0.0062                        -0.0092
## 5      5                  -0.0030                        -0.0177
## 6      6                   0.0017                        -0.0036
## 7      7                  -0.0007                        -0.0048
## 8      8                   0.0004                        -0.0021
##   LGoogle_Impressions.diff Upper
## 1                         0.0511
## 2                         0.0697
## 3                         0.0210
## 4                         0.0346
## 5                         0.0059
## 6                         0.0100
## 7                         0.0031
## 8                         0.0025
## Email Impressions
Email_Impressions.irf_table <- round(data.frame(period = seq(1, 8), 
                           response.Email_Impressions = irfs$irf$LEmail_Impressions, 
                           Email_Impressions.lower = irfs$Lower$LEmail_Impressions, 
                           Email_Impressions.upper = irfs$Upper$LEmail_Impressions),4)
colnames(Email_Impressions.irf_table) <- c('Period', 'LEmail_Impressions', 'LEmail_Impressions Lower', 'LEmail_Impressions Upper')
Email_Impressions.irf_table
##   Period LEmail_Impressions LEmail_Impressions Lower LEmail_Impressions Upper
## 1      1             0.0541                  -0.0151                   0.1386
## 2      2            -0.0360                  -0.1041                   0.0455
## 3      3             0.0260                  -0.0053                   0.0678
## 4      4            -0.0093                  -0.0355                   0.0165
## 5      5             0.0052                  -0.0041                   0.0239
## 6      6            -0.0016                  -0.0117                   0.0057
## 7      7             0.0007                  -0.0027                   0.0068
## 8      8            -0.0001                  -0.0038                   0.0023
## Facebook Impressions
Facebook_Impressions.irf_table <- round(data.frame(period = seq(1, 8), 
                           response.Facebook_Impressions = irfs$irf$LFacebook_Impressions.diff, 
                           Facebook_Impressions.lower = irfs$Lower$LFacebook_Impressions.diff, 
                           Facebook_Impressions.upper = irfs$Upper$LFacebook_Impressions.diff),4)
colnames(Facebook_Impressions.irf_table) <- c('Period', 'LFacebook_Impressions.diff', 'LFacebook_Impressions.diff Lower', 'LFacebook_Impressions.diff Upper')
Facebook_Impressions.irf_table
##   Period LFacebook_Impressions.diff LFacebook_Impressions.diff Lower
## 1      1                    -0.0109                          -0.0778
## 2      2                     0.0440                          -0.0287
## 3      3                    -0.0235                          -0.0619
## 4      4                     0.0125                          -0.0081
## 5      5                    -0.0055                          -0.0205
## 6      6                     0.0024                          -0.0028
## 7      7                    -0.0009                          -0.0069
## 8      8                     0.0003                          -0.0016
##   LFacebook_Impressions.diff Upper
## 1                           0.0658
## 2                           0.1114
## 3                           0.0071
## 4                           0.0362
## 5                           0.0057
## 6                           0.0132
## 7                           0.0025
## 8                           0.0046
## Affiliate Impressions
Affiliate_Impressions.irf_table <- round(data.frame(period = seq(1, 8), 
                           response.Affiliate_Impressions = irfs$irf$LAffiliate_Impressions.diff, 
                           Affiliate_Impressions.lower = irfs$Lower$LAffiliate_Impressions.diff, 
                           Affiliate_Impressions.upper = irfs$Upper$LAffiliate_Impressions.diff),4)
colnames(Affiliate_Impressions.irf_table) <- c('Period', 'LAffiliate_Impressions.diff', 'LAffiliate_Impressions.diff Lower', 'LAffiliate_Impressions.diff Upper')
Affiliate_Impressions.irf_table
##   Period LAffiliate_Impressions.diff LAffiliate_Impressions.diff Lower
## 1      1                     -0.0166                           -0.0760
## 2      2                     -0.0115                           -0.0733
## 3      3                      0.0158                           -0.0346
## 4      4                     -0.0117                           -0.0431
## 5      5                      0.0077                           -0.0165
## 6      6                     -0.0044                           -0.0159
## 7      7                      0.0024                           -0.0061
## 8      8                     -0.0012                           -0.0055
##   LAffiliate_Impressions.diff Upper
## 1                            0.0430
## 2                            0.0516
## 3                            0.0595
## 4                            0.0233
## 5                            0.0264
## 6                            0.0101
## 7                            0.0091
## 8                            0.0036

We can the apply the t>1 criteria to determine coefficient significance and calculate long-term elasticities of the different advertising spending.

# Paid Views
Paid_Views.irf_results <- matrix(nrow = 8, ncol = 1)

for (i in 1:8) {
  se <- (irfs$Upper$LPaid_Views.diff[i]-irfs$Lower$LPaid_Views.diff[i])/(2*1.96)
  Paid_Views.irf_t <- irfs$irf$LPaid_Views.diff[i]/se
   
   if (Paid_Views.irf_t>1) {
    Paid_Views.irf_results[i] <- irfs$irf$LPaid_Views.diff[i]
   } else {
      Paid_Views.irf_results[i] <-0
      }
}

Paid_Views.irf_results
##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    0
## [5,]    0
## [6,]    0
## [7,]    0
## [8,]    0
lr_paid_views <- sum(Paid_Views.irf_results)
lr_paid_views
## [1] 0

Once the t>1 rule is applied, we find that the paid views advertising has no significant and positive impact on all eight periods studied. A 1% increase in paid views advertising spending will increase the firm’s sales by 0% in the long run.

# Organic Views
Organic_Views.irf_results <- matrix(nrow = 8, ncol = 1)

for (i in 1:8) {
  se <- (irfs$Upper$LOrganic_Views.diff[i]-irfs$Lower$LOrganic_Views.diff[i])/(2*1.96)
  Organic_Views.irf_t <- irfs$irf$LOrganic_Views.diff[i]/se
   
   if (Organic_Views.irf_t>1) {
    Organic_Views.irf_results[i] <- irfs$irf$LOrganic_Views.diff[i]
   } else {
      Organic_Views.irf_results[i] <-0
      }
}

Organic_Views.irf_results
##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    0
## [5,]    0
## [6,]    0
## [7,]    0
## [8,]    0
lr_organic_views <- sum(Organic_Views.irf_results)
lr_organic_views
## [1] 0

Similarly to paid views advertising, the organic views advertising has no significant and positive impact. A 1% increase in organic views advertising spending will increase the firm’s sales by 0% in the long run.

# Google Impressions
Google_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)

for (i in 1:8) {
  se <- (irfs$Upper$LGoogle_Impressions.diff[i]-irfs$Lower$LGoogle_Impressions.diff[i])/(2*1.96)
  Google_Impressions.irf_t <- irfs$irf$LGoogle_Impressions.diff[i]/se
   
   if (Google_Impressions.irf_t>1) {
    Google_Impressions.irf_results[i] <- irfs$irf$LGoogle_Impressions.diff[i]
   } else {
      Google_Impressions.irf_results[i] <-0
      }
}

Google_Impressions.irf_results
##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    0
## [5,]    0
## [6,]    0
## [7,]    0
## [8,]    0
lr_google_impressions <- sum(Google_Impressions.irf_results)
lr_google_impressions
## [1] 0

The Google impressions advertising has no significant and positive impact with a 1% increase in Google impressions advertising spending increasing the firm’s sales by 0% in the long run.

# Email Impressions
Email_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)

for (i in 1:8) {
  se <- (irfs$Upper$LEmail_Impressions[i]-irfs$Lower$LEmail_Impressions[i])/(2*1.96)
  Email_Impressions.irf_t <- irfs$irf$LEmail_Impressions[i]/se
   
   if (Email_Impressions.irf_t>1) {
    Email_Impressions.irf_results[i] <- irfs$irf$LEmail_Impressions[i]
   } else {
      Email_Impressions.irf_results[i] <-0
      }
}

Email_Impressions.irf_results
##            [,1]
## [1,] 0.05409493
## [2,] 0.00000000
## [3,] 0.02602730
## [4,] 0.00000000
## [5,] 0.00000000
## [6,] 0.00000000
## [7,] 0.00000000
## [8,] 0.00000000
lr_email_impressions <- sum(Email_Impressions.irf_results)
lr_email_impressions
## [1] 0.08012222

The email impressions advertising has a significant and positive impact in the first and third periods. A 1% increase in email impressions advertising spending will increase the firm’s sales by 0.08% in the long run.

# Facebook Impressions
Facebook_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)

for (i in 1:8) {
  se <- (irfs$Upper$LFacebook_Impressions.diff[i]-irfs$Lower$LFacebook_Impressions.diff[i])/(2*1.96)
  Facebook_Impressions.irf_t <- irfs$irf$LFacebook_Impressions.diff[i]/se
   
   if (Facebook_Impressions.irf_t>1) {
    Facebook_Impressions.irf_results[i] <- irfs$irf$LFacebook_Impressions.diff[i]
   } else {
      Facebook_Impressions.irf_results[i] <-0
      }
}

Facebook_Impressions.irf_results
##            [,1]
## [1,] 0.00000000
## [2,] 0.04401091
## [3,] 0.00000000
## [4,] 0.01250019
## [5,] 0.00000000
## [6,] 0.00000000
## [7,] 0.00000000
## [8,] 0.00000000
lr_facebook_impressions <- sum(Facebook_Impressions.irf_results)
lr_facebook_impressions
## [1] 0.0565111

The facebook impressions advertising has a significant and positive impact in period 2. A 1% increase in facebook impressions advertising spending will increase the firm’s sales by 0.044% in the long run.

# Affiliate Impressions
Affiliate_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)

for (i in 1:8) {
  se <- (irfs$Upper$LAffiliate_Impressions.diff[i]-irfs$Lower$LAffiliate_Impressions.diff[i])/(2*1.96)
  Affiliate_Impressions.irf_t <- irfs$irf$LAffiliate_Impressions.diff[i]/se
   
   if (Affiliate_Impressions.irf_t>1) {
    Affiliate_Impressions.irf_results[i] <- irfs$irf$LAffiliate_Impressions.diff[i]
   } else {
      Affiliate_Impressions.irf_results[i] <-0
      }
}

Affiliate_Impressions.irf_results
##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    0
## [5,]    0
## [6,]    0
## [7,]    0
## [8,]    0
lr_affiliate_impressions <- sum(Affiliate_Impressions.irf_results)
lr_affiliate_impressions
## [1] 0

The affiliate impressions advertising has no significant and positive impact. A 1% increase in affiliate impressions advertising spending thus will increase the firm’s sales by 0% in the long run.

Optimal budget allocation

# Current budget allocation
cost_paid_views <- sum(data$Paid_Views)
cost_organic_views <- sum(data$Organic_Views)
cost_google_impressions <- sum(data$Google_Impressions)
cost_email_impressions <-sum(data$Email_Impressions)
cost_facebook_impressions <- sum(data$Facebook_Impressions)
cost_affiliate_impressions <- sum(data$Affiliate_Impressions)
cost_total <- cost_paid_views + cost_paid_views + cost_google_impressions + cost_email_impressions + cost_facebook_impressions + cost_affiliate_impressions

# Share of current budget allocation
costshare_paid_views <- cost_paid_views/cost_total
costshare_organic_views <- cost_organic_views/cost_total
costshare_google_impressions <- cost_google_impressions/cost_total
costshare_email_impressions <- cost_email_impressions/cost_total
costshare_facebook_impressions <- cost_facebook_impressions/cost_total
costshare_affiliate_impressions <- cost_affiliate_impressions/cost_total

# Pie Chart
actual_shares <- c(costshare_paid_views, costshare_organic_views, costshare_google_impressions, costshare_email_impressions, costshare_facebook_impressions, costshare_affiliate_impressions)
labels <- c("Paid_Views", "Organic_Views", "Google_Impressions", "Email_Impressions", "Facebook_Impressions", "Affiliate_Impressions")
actual_percentages <- round(actual_shares*100)
labels <- paste(labels, actual_percentages)
labels <- paste(labels, "%", sep="")

# Get the pie-chart
pie(actual_shares, labels=labels, main="Current Budget Allocation")

We now figure out the optimal budget allocation.

# Get the coefficients from IRF results
beta_paid_views <- lr_paid_views
beta_organic_views <- lr_organic_views
beta_google_impressions <- lr_google_impressions
beta_email_impressions <- lr_email_impressions
beta_facebook_impressions <- lr_facebook_impressions
beta_affiliate_impressions <- lr_affiliate_impressions

# The sum of all elasticities 
beta_all <- beta_paid_views + beta_organic_views + beta_google_impressions + beta_email_impressions + beta_facebook_impressions + beta_affiliate_impressions

# Optimal resource allocation
optim_paid_views <- beta_paid_views/beta_all
optim_organic_views <- beta_organic_views/beta_all
optim_google_impressions <- beta_google_impressions/beta_all
optim_email_impressions <- beta_email_impressions/beta_all
optim_facebook_impressions <- beta_facebook_impressions/beta_all
optim_affiliate_impressions <- beta_affiliate_impressions/beta_all

This optimal budget allocation can be summarised in a new pie chart.

# Optimal spending
optimal_spend <- c(optim_paid_views, optim_organic_views, optim_google_impressions, optim_email_impressions, optim_facebook_impressions, optim_affiliate_impressions)
optimal_spend = round(optimal_spend, digits=5)
optimal_spend
## [1] 0.0000 0.0000 0.0000 0.5864 0.4136 0.0000
optimal_shares <- c(optim_paid_views, optim_organic_views, optim_google_impressions, optim_email_impressions, optim_facebook_impressions, optim_affiliate_impressions)
optimal_labels<-c("Paid View", "Organic Views", "Google Impressions", "Email Impressions", "Facebook Impressions", "Affiliate Impressions")
optimal_percentages <- round(optimal_shares*100)
optimal_labels <- paste(optimal_labels, optimal_percentages)
optimal_labels <- paste(optimal_labels, "%", sep="")

# Get the pie-chart
pie(optimal_shares, labels=optimal_labels, main="Optimal Budget Allocation" )

The firm should focus their marketing spending only on email and facebook impressions.